/* -*-C-*- */
#pragma GCC diagnostic ignored "-Wunused-variable"
#define DIE(...) do { MVM_oops(tc, __VA_ARGS__); } while (0)

/* NB: The rax/eax/ax/al/ah register is *reserved* for internal use in tiles by
 * the register allocator. Using rax will never overwrite an allocated value */

/* basic memory traffic tiles */
MVM_JIT_TILE_DECL(addr) {
    MVMint8 out  = tile->values[0];
    MVMint8 base = tile->values[1];
    MVMint32 ofs = tile->args[0];
    | lea Rq(out), [Rq(base)+ofs];
}


MVM_JIT_TILE_DECL(idx) {
    MVMint8 out  = tile->values[0];
    MVMint8 base = tile->values[1];
    MVMint8 idx  = tile->values[2];
    MVMint8 scl  = tile->args[0];
    if (scl == 8) {
        | lea Rq(out), [Rq(base)+Rq(idx)*8];
    } else {
        DIE("Unsupported scale: %d", scl);
    }
}


MVM_JIT_TILE_DECL(const_reg) {
    MVMint8 out = tile->values[0];
    MVMint32 val  = tile->args[0];
    MVMint32 size = tile->args[1];
    | mov Rq(out), val;
}

MVM_JIT_TILE_DECL(const_large) {
    MVMint8 out = tile->values[0];
    MVMint64 val = tree->constants[tile->args[0]].i;
    | mov64 Rq(out), val;
}

MVM_JIT_TILE_DECL(const_num) {
    MVMuint8 out = tile->values[0] - MVM_JIT_REG(XMM0);
    MVMnum64 val = tree->constants[tile->args[0]].n;
    unsigned char bytes[sizeof(val)];
    size_t i;
    memcpy(bytes, &val, sizeof(val));
    |.data;
    |5:
    for (i = 0; i < sizeof(bytes); i++) {
        |.byte bytes[i];
    }
    |.code;
    | movsd xmm(out), qword [<5];
}

MVM_JIT_TILE_DECL(load_reg) {
    MVMint8 out  = tile->values[0];
    MVMint8 base = tile->values[1];
    MVMint32 size = tile->args[0];
    switch (size) {
    case 1:
        | mov Rb(out), [Rq(base)];
        break;
    case 2:
        | mov Rw(out), [Rq(base)];
        break;
    case 4:
        | mov Rd(out), [Rq(base)];
        break;
    case 8:
        | mov Rq(out), [Rq(base)];
        break;
    default:
        DIE("Unsupported load size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(load_addr) {
    MVMint8 out  = tile->values[0];
    MVMint8 base = tile->values[1];
    MVMint32 ofs  = tile->args[0];
    MVMint32 size = tile->args[1];
    switch (tile->args[1]) {
    case 1:
        | mov Rb(out), byte [Rq(base)+ofs];
        break;
    case 2:
        | mov Rw(out), word [Rq(base)+ofs];
        break;
    case 4:
        | mov Rd(out), dword [Rq(base)+ofs];
        break;
    case 8:
        | mov Rq(out), qword [Rq(base)+ofs];
        break;
    default:
        DIE("Unsupported load size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(load_idx) {
    MVMint8 out  = tile->values[0];
    MVMint8 base = tile->values[1];
    MVMint8 idx  = tile->values[2];
    MVMint8 scl  = tile->args[0];
    MVMint32 size = tile->args[1];
    if (scl != 8) {
        DIE("Unsupported scale size: %d\n", scl);
    }
    switch (size) {
    case 1:
        | mov Rb(out), byte [Rq(base)+Rq(idx)*8];
        break;
    case 2:
        | mov Rw(out), word [Rq(base)+Rq(idx)*8];
        break;
    case 4:
        | mov Rd(out), dword [Rq(base)+Rq(idx)*8];
        break;
    case 8:
        | mov Rq(out), qword [Rq(base)+Rq(idx)*8];
        break;
    default:
        DIE("Unsupported load size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(load_num) {
    MVMint8 out = REG_NUM(tile->values[0]);
    MVMint8 addr = tile->values[1];
    | movsd xmm(out), qword [Rq(addr)];
}

MVM_JIT_TILE_DECL(load_num_addr) {
    MVMint8 out = REG_NUM(tile->values[0]);
    MVMint8 addr = tile->values[1];
    MVMint32 ofs  = tile->args[0];
    | movsd xmm(out), qword [Rq(addr)+ofs];
}


MVM_JIT_TILE_DECL(store) {
    MVMint8 base  = tile->values[1];
    MVMint8 value = tile->values[2];
    MVMint32 size = tile->args[0];
    switch (size) {
    case 1:
        | mov byte [Rq(base)], Rb(value);
        break;
    case 2:
        | mov word [Rq(base)], Rw(value);
        break;
    case 4:
        | mov dword [Rq(base)], Rd(value);
        break;
    case 8:
        | mov qword [Rq(base)], Rq(value);
        break;
    default:
        DIE("Unsupported store size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(store_addr) {
    MVMint8 base  = tile->values[1];
    MVMint8 value = tile->values[2];
    MVMint32 ofs  = tile->args[0];
    MVMint32 size = tile->args[1];
    switch (size) {
    case 1:
        | mov byte [Rq(base)+ofs], Rb(value);
        break;
    case 2:
        | mov word [Rq(base)+ofs], Rw(value);
        break;
    case 4:
        | mov dword [Rq(base)+ofs], Rd(value);
        break;
    case 8:
        | mov qword [Rq(base)+ofs], Rq(value);
        break;
    default:
        DIE("Unsupported store size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(store_idx) {
    MVMint8 base = tile->values[1];
    MVMint8 idx  = tile->values[2];
    MVMint8 scl  = tile->args[0];
    MVMint32 size = tile->args[1];
    MVMint8 value = tile->values[3];
    if (scl != 8)
        DIE("Scale %d NYI\n", scl);
    switch (size) {
    case 1:
        | mov byte [Rq(base)+Rq(idx)*8], Rb(value);
        break;
    case 2:
        | mov word [Rq(base)+Rq(idx)*8], Rw(value);
        break;
    case 4:
        | mov dword [Rq(base)+Rq(idx)*8], Rd(value);
        break;
    case 8:
        | mov qword [Rq(base)+Rq(idx)*8], Rq(value);
        break;
    default:
        DIE("Unsupported store size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(store_num) {
    MVMint8 addr = tile->values[1];
    MVMint8 val =  tile->values[2] - MVM_JIT_REG(XMM0);
    | movsd qword [Rq(addr)], xmm(val);
}

MVM_JIT_TILE_DECL(store_num_addr) {
    MVMint8 addr  = tile->values[1];
    MVMint8 value = REG_NUM(tile->values[2]);
    MVMint32 ofs  = tile->args[0];
    | movsd qword [Rq(addr)+ofs], xmm(value);
}


MVM_JIT_TILE_DECL(cast_signed) {
    MVMint32 to_size   = tile->args[0];
    MVMint32 from_size = tile->args[1];

    MVMint8  to_reg    = tile->values[0];
    MVMint8  from_reg  = tile->values[1];

    /* possible combinations: 1 -> 2, 4, 8; 2 -> 4, 8; 4 -> 8
     * Hence we can combine from_size | (to_size << 3) to get
     * the following options:
     * 1 + 2<<3 == 17
     * 1 + 4<<3 == 33
     * 1 + 8<<3 == 65
     * 2 + 4<<3 == 34
     * 2 + 8<<3 == 66
     * 4 + 8<<3 == 68
     */
    MVMint32 size_conv = (from_size) | (to_size << 3);
    switch (size_conv) {
    case 17:
        | movsx Rw(to_reg), Rb(from_reg);
        break;
    case 33:
        | movsx Rd(to_reg), Rb(from_reg);
        break;
    case 34:
        | movsx Rd(to_reg), Rw(from_reg);
        break;
    case 65:
        | movsx Rq(to_reg), Rb(from_reg);
        break;
    case 66:
        | movsx Rq(to_reg), Rw(from_reg);
        break;
    case 68:
        /* movsx is apparantly not defined for double-to-quadword conversions,
         * which forces us to use the rax register like it's 1978. It might be easier
         * to bithack the sign-extension manually, but I'm not sure how.. */
        | mov eax, Rd(from_reg);
        | cdqe;
        | mov Rq(to_reg), rax;
        break;
    default:
        DIE("Unsupported signed cast %d -> %d\n", from_size, to_size);
    }
}

MVM_JIT_TILE_DECL(cast_unsigned) {
    MVMint32 to_size   = tile->args[0];
    MVMint32 from_size = tile->args[1];

    MVMint8  to_reg    = tile->values[0];
    MVMint8  from_reg  = tile->values[1];

    /* Same combination set as above, but we also support casting down:
     * 8 -> 1 = 8  (1<<3 = 8, 8 | 8 = 8)
     * 4 -> 1 = 12
     * 2 -> 1 = 10
     * 8 -> 2 = 24
     * 4 -> 2 = 20
     * 8 -> 4 = 40
     */
    MVMint32 size_conv = (from_size) | (to_size << 3);
    switch (size_conv) {
    case 8:  /* 8 -> 1 */
    case 10: /* 2 -> 1 */
    case 12: /* 4 -> 1 */
        | movzx Rd(to_reg), Rb(from_reg);
        break;
    case 17: /* 1 -> 2 */
        | movzx Rw(to_reg), Rb(from_reg);
        break;
    case 20: /* 4 -> 2 */
    case 24: /* 8 -> 2 */
        | movzx Rd(to_reg), Rw(to_reg);
        break;
    case 33: /* 1 -> 4 */
        | movzx Rd(to_reg), Rb(from_reg);
        break;
    case 34: /* 2 -> 4 */
        | movzx Rd(to_reg), Rw(from_reg);
        break;
    case 40: /* 8 -> 4 */
        | mov Rd(to_reg), Rd(from_reg);
        break;
    case 65: /* 1 -> 8 */
        | movzx Rq(to_reg), Rb(from_reg);
        break;
    case 66: /* 2 -> 8 */
        | movzx Rq(to_reg), Rw(from_reg);
        break;
    case 68: /* 4 -> 8 */
        /* In contrast to signed cast, nothing special is necessary to cast
         * unsigned doublewords to quadwords, because using the lower 4 bytes
         * automatically clears the upper 4 */
        | mov Rd(to_reg), Rd(from_reg);
        break;
    default:
        DIE("Unsupported unsigned cast %d -> %d (%d)\n", from_size, to_size, size_conv);
    }

}


/* Logic is the same as cast above except loading from memory instead of reg */
/* See comments in cast above for more detail */
MVM_JIT_TILE_DECL(cast_signed_load_addr) {
    MVMint32 ofs       = tile->args[0];
    MVMint32 load_size = tile->args[1];
    MVMint32 to_size   = tile->args[2];
    MVMint32 from_size = tile->args[3];

    MVMint8  to_reg    = tile->values[0];
    MVMint8  base      = tile->values[1];

    MVMint32 size_conv = (from_size) | (to_size << 3);
    switch (size_conv) {
    case 17:
        | movsx Rw(to_reg), byte [Rq(base)+ofs];
        break;
    case 33:
        | movsx Rd(to_reg), byte [Rq(base)+ofs];
        break;
    case 34:
        | movsx Rd(to_reg), word [Rq(base)+ofs];
        break;
    case 65:
        | movsx Rq(to_reg), byte [Rq(base)+ofs];
        break;
    case 66:
        | movsx Rq(to_reg), word [Rq(base)+ofs];
        break;
    case 68:
        | mov eax, dword [Rq(base)+ofs];
        | cdqe;
        | mov Rq(to_reg), rax;
        break;
    default:
        DIE("Unsupported signed cast with load %d -> %d\n", from_size, to_size);
    }
}

/* Logic is the same as cast above except loading from memory instead of reg */
/* See comments in cast above for more detail */
MVM_JIT_TILE_DECL(cast_unsigned_load_addr) {
    MVMint32 ofs       = tile->args[0];
    MVMint32 load_size = tile->args[1];
    MVMint32 to_size   = tile->args[2];
    MVMint32 from_size = tile->args[3];

    MVMint8  to_reg    = tile->values[0];
    MVMint8  base      = tile->values[1];

    MVMint32 size_conv = (from_size) | (to_size << 3);
    switch (size_conv) {
    case 8:  /* 8 -> 1 */
    case 10: /* 2 -> 1 */
    case 12: /* 4 -> 1 */
        | movzx Rd(to_reg), byte [Rq(base)+ofs];
        break;
    case 17: /* 1 -> 2 */
        | movzx Rw(to_reg), byte [Rq(base)+ofs];
        break;
    case 20: /* 4 -> 2 */
    case 24: /* 8 -> 2 */
        | movzx Rd(to_reg), word [Rq(base)+ofs];
        break;
    case 33: /* 1 -> 4 */
        | movzx Rd(to_reg), byte [Rq(base)+ofs];
        break;
    case 34: /* 2 -> 4 */
        | movzx Rd(to_reg), word [Rq(base)+ofs];
        break;
    case 40: /* 8 -> 4 */
        | mov Rd(to_reg), dword [Rq(base)+ofs];
        break;
    case 65: /* 1 -> 8 */
        | movzx Rq(to_reg), byte [Rq(base)+ofs];
        break;
    case 66: /* 2 -> 8 */
        | movzx Rq(to_reg), word [Rq(base)+ofs];
        break;
    case 68: /* 4 -> 8 */
        | mov Rd(to_reg),  dword [Rq(base)+ofs];
        break;
    default:
        DIE("Unsupported unsigned cast with load %d -> %d (size_conv=%d)\n", from_size, to_size, size_conv);
    }
}


/* binary operations have special requirements because x86 is two-operand form, e.g:
 * r0 = r0 <op> r1
 * whereas the JIT uses a three-operand model:
 * r0 = r1 <op> r2 */

static void ensure_two_operand_pre(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitTile *tile, MVMint8 reg[2]) {
    MVMint8 out = tile->values[0], in1 = tile->values[1], in2 = tile->values[2];
    assert(out == in1);
    reg[0] = REG_NUM(out);
    reg[1] = REG_NUM(in2);
}

static void ensure_two_operand_post(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitTile *tile, MVMint8 reg[2]) {
}


MVM_JIT_TILE_DECL(add_reg) {
    MVMint8 reg[2];
    ensure_two_operand_pre(tc, compiler, tile, reg);
    | add Rq(reg[0]), Rq(reg[1]);
    ensure_two_operand_post(tc, compiler, tile, reg);
}

MVM_JIT_TILE_DECL(add_const) {
    MVMint8 out = tile->values[0];
    MVMint8 in1  = tile->values[1];
    MVMint32 val = tile->args[0];
    MVMint32 sz  = tile->args[1];
    if (out != in1) {
        | mov Rq(out), Rq(in1);
    }
    | add Rq(out), val;
}

MVM_JIT_TILE_DECL(add_load_addr) {
    MVMint8 out   = tile->values[0];
    MVMint8 in1   = tile->values[1];
    MVMint8 base  = tile->values[2];
    MVMint32 ofs  = tile->args[0];
    MVMint32 size = tile->args[1];
    assert(out == in1);
    switch (size) {
    case 1:
        | add Rb(out), byte [Rq(base)+ofs];
        break;
    case 2:
        | add Rw(out), word [Rq(base)+ofs];
        break;
    case 4:
        | add Rd(out), dword [Rq(base)+ofs];
        break;
    case 8:
        | add Rq(out), qword [Rq(base)+ofs];
        break;
    default:
        DIE("Unsupported load size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(add_load_idx) {
    MVMint8 out  = tile->values[0];
    MVMint8 in1  = tile->values[1];
    MVMint8 base = tile->values[2];
    MVMint8 idx  = tile->values[3];
    MVMint32 scl  = tile->args[0];
    MVMint32 size = tile->args[1];
    assert(out == in1);
    if (scl != 8)
        DIE("IDX Scale %d NYI\n", scl);
    switch (size) {
    case 1:
        | add Rb(out), byte [Rq(base)+Rq(idx)*8];
        break;
    case 2:
        | add Rw(out), word [Rq(base)+Rq(idx)*8];
        break;
    case 4:
        | add Rd(out), dword [Rq(base)+Rq(idx)*8];
        break;
    case 8:
        | add Rq(out), qword [Rq(base)+Rq(idx)*8];
        break;
    default:
        DIE("Unsupported load size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(add_num) {
    MVMint8 reg[2];
    ensure_two_operand_pre(tc, compiler, tile, reg);
    | addsd xmm(reg[0]), xmm(reg[1]);
    ensure_two_operand_post(tc, compiler, tile, reg);
}

MVM_JIT_TILE_DECL(sub_num) {
    MVMint8 reg[2];
    ensure_two_operand_pre(tc, compiler, tile, reg);
    | subsd xmm(reg[0]), xmm(reg[1]);
    ensure_two_operand_post(tc, compiler, tile, reg);
}

MVM_JIT_TILE_DECL(mul_num) {
    MVMint8 reg[2];
    ensure_two_operand_pre(tc, compiler, tile, reg);
    | mulsd xmm(reg[0]), xmm(reg[1]);
    ensure_two_operand_post(tc, compiler, tile, reg);
}



MVM_JIT_TILE_DECL(and_reg) {
    MVMint8 reg[2];
    ensure_two_operand_pre(tc, compiler, tile, reg);
    | and Rq(reg[0]), Rq(reg[1]);
    ensure_two_operand_post(tc, compiler, tile, reg);
}

MVM_JIT_TILE_DECL(and_const) {
    MVMint8 out = tile->values[0];
    MVMint8 in1  = tile->values[1];
    MVMint32 val = tile->args[0];
    MVMint32 sz  = tile->args[1];
    assert(out == in1);
    | and Rq(out), val;
}

MVM_JIT_TILE_DECL(and_load_addr) {
    MVMint8 out   = tile->values[0];
    MVMint8 in1   = tile->values[1];
    MVMint8 base  = tile->values[2];
    MVMint32 ofs  = tile->args[0];
    MVMint32 size = tile->args[1];
    assert(out == in1);
    switch (size) {
    case 1:
        | and Rb(out), byte [Rq(base)+ofs];
        break;
    case 2:
        | and Rw(out), word [Rq(base)+ofs];
        break;
    case 4:
        | and Rd(out), dword [Rq(base)+ofs];
        break;
    case 8:
        | and Rq(out), qword [Rq(base)+ofs];
        break;
    default:
        DIE("Unsupported load size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(and_load_idx) {
    MVMint8 out  = tile->values[0];
    MVMint8 in1  = tile->values[1];
    MVMint8 base = tile->values[2];
    MVMint8 idx  = tile->values[3];
    MVMint32 scl  = tile->args[0];
    MVMint32 size = tile->args[1];
    assert(out == in1);
    if (scl != 8)
        DIE("IDX Scale %d NYI\n", scl);
    switch (size) {
    case 1:
        | and Rb(out), byte [Rq(base)+Rq(idx)*8];
        break;
    case 2:
        | and Rw(out), word [Rq(base)+Rq(idx)*8];
        break;
    case 4:
        | and Rd(out), dword [Rq(base)+Rq(idx)*8];
        break;
    case 8:
        | and Rq(out), qword [Rq(base)+Rq(idx)*8];
        break;
    default:
        DIE("Unsupported load size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(mul_reg) {
    MVMint8 reg[2];
    ensure_two_operand_pre(tc, compiler, tile, reg);
    | imul Rq(reg[0]), Rq(reg[1]);
    ensure_two_operand_post(tc, compiler, tile, reg);
}

MVM_JIT_TILE_DECL(or_reg) {
    MVMint8 reg[2];
    ensure_two_operand_pre(tc, compiler, tile, reg);
    | or Rq(reg[0]), Rq(reg[1]);
    ensure_two_operand_post(tc, compiler, tile, reg);
}

MVM_JIT_TILE_DECL(xor_reg) {
    MVMint8 reg[2];
    ensure_two_operand_pre(tc, compiler, tile, reg);
    | xor Rq(reg[0]), Rq(reg[1]);
    ensure_two_operand_post(tc, compiler, tile, reg);
}

MVM_JIT_TILE_DECL(not_reg) {
    MVMint8 out = tile->values[0];
    MVMint8 in  = tile->values[1];
    assert(out == in);
    | not Rq(out);
}

MVM_JIT_TILE_DECL(sub_reg) {
    MVMint8 reg[2];
    ensure_two_operand_pre(tc, compiler, tile, reg);
    | sub Rq(reg[0]), Rq(reg[1]);
    ensure_two_operand_post(tc, compiler, tile, reg);
}

MVM_JIT_TILE_DECL(sub_const) {
    MVMint8 out = tile->values[0];
    MVMint8 in1  = tile->values[1];
    MVMint32 val = tile->args[0];
    MVMint32 sz  = tile->args[1];
    assert(out == in1);
    | sub Rq(out), val;
}

MVM_JIT_TILE_DECL(sub_load_addr) {
    MVMint8 out   = tile->values[0];
    MVMint8 in1   = tile->values[1];
    MVMint8 base  = tile->values[2];
    MVMint32 ofs  = tile->args[0];
    MVMint32 size = tile->args[1];
    assert(out == in1);
    switch (size) {
    case 1:
        | sub Rb(out), byte [Rq(base)+ofs];
        break;
    case 2:
        | sub Rw(out), word [Rq(base)+ofs];
        break;
    case 4:
        | sub Rd(out), dword [Rq(base)+ofs];
        break;
    case 8:
        | sub Rq(out), qword [Rq(base)+ofs];
        break;
    default:
        DIE("Unsupported load size: %d\n", size);
    }
}

MVM_JIT_TILE_DECL(sub_load_idx) {
    MVMint8 out  = tile->values[0];
    MVMint8 in1  = tile->values[1];
    MVMint8 base = tile->values[2];
    MVMint8 idx  = tile->values[3];
    MVMint32 scl  = tile->args[0];
    MVMint32 size = tile->args[1];
    assert(out != in1);
    if (scl != 8)
        DIE("IDX Scale %d NYI\n", scl);
    switch (size) {
    case 1:
        | sub Rb(out), byte [Rq(base)+Rq(idx)*8];
        break;
    case 2:
        | sub Rw(out), word [Rq(base)+Rq(idx)*8];
        break;
    case 4:
        | sub Rd(out), dword [Rq(base)+Rq(idx)*8];
        break;
    case 8:
        | sub Rq(out), qword [Rq(base)+Rq(idx)*8];
        break;
    default:
        DIE("Unsupported load size: %d\n", size);
    }
}




MVM_JIT_TILE_DECL(test) {
    MVMint8 reg = tile->values[1];
    switch (tile->size) {
    case 1:
        | test Rb(reg), Rb(reg);
        break;
    case 2:
        | test Rw(reg), Rw(reg);
        break;
    case 4:
        | test Rd(reg), Rd(reg);
        break;
    case 8:
    default:
        /* NB - this is a hack, because we don't assign a size to the result of
         * CALL, its size would be 0, and we'd have nothing compiled. Better fix
         * would be to assign a result size to CALL. */
        | test Rq(reg), Rq(reg);
        break;
    }
}



MVM_JIT_TILE_DECL(test_addr) {
    MVMint8 base  = tile->values[1];
    MVMint32 ofs  = tile->args[0];
    MVMint32 size = tile->args[1];
    /* TODO - I now suspect that this can be OR [addr], 0, without loss of generality */
    switch (size) {
    case 1:
        | cmp byte [Rq(base)+ofs], 0;
        break;
    case 2:
        | cmp word [Rq(base)+ofs], 0;
        break;
    case 4:
        | cmp dword [Rq(base)+ofs], 0;
        break;
    case 8:
        | cmp qword [Rq(base)+ofs], 0;
        break;
    default:
        DIE("Unsupported size %d for load\n", size);
    }
}


MVM_JIT_TILE_DECL(test_idx) {
    MVMint8 base = tile->values[1];
    MVMint8 idx  = tile->values[2];
    MVMint32 scl = tile->args[0];
    MVMint32 size = tile->args[1];
    if (scl != 8)
        DIE("Scale %d NYI\n", scl);
    switch(size) {
    case 1:
        | cmp byte [Rq(base)+Rq(idx)*8], 0;
        break;
    case 2:
        | cmp word [Rq(base)+Rq(idx)*8], 0;
        break;
    case 4:
        | cmp dword [Rq(base)+Rq(idx)*8], 0;
        break;
    case 8:
        | cmp qword [Rq(base)+Rq(idx)*8], 0;
        break;
    default:
        DIE("Unsupported size %d for load\n", tile->size);
    }
}

MVM_JIT_TILE_DECL(test_and) {
    MVMint8 rega = tile->values[1];
    MVMint8 regb = tile->values[2];
    switch(tile->size) {
    case 1:
        | test Rb(regb), Rb(rega);
        break;
    case 2:
        | test Rw(regb), Rw(rega);
        break;
    case 4:
        | test Rd(regb), Rd(rega);
        break;
    case 8:
        | test Rq(regb), Rq(rega);
        break;
    }
}

MVM_JIT_TILE_DECL(test_const) {
    MVMint8  reg = tile->values[1];
    MVMint32 val = tile->args[0];
    switch(tile->size) {
    case 1:
        | test Rb(reg), val;
        break;
    case 2:
        | test Rw(reg), val;
        break;
    case 4:
        | test Rd(reg), val;
        break;
    default:
        DIE("Unsupported size of constant");
        break;
    }
}

MVM_JIT_TILE_DECL(test_addr_const) {
    MVMint8  reg = tile->values[1];
    /* args: $ofs $lsize $val $csize */
    MVMint32 ofs = tile->args[0];
    MVMint32 val = tile->args[2];
    switch(tile->size) {
    case 1:
        | test byte [Rq(reg)+ofs], val;
        break;
    case 2:
        | test word [Rq(reg)+ofs], val;
        break;
    case 4:
        | test dword [Rq(reg)+ofs], val;
        break;
    default:
        DIE("Unsupported size of constant");
        break;
    }
}

MVM_JIT_TILE_DECL(test_num) {
    MVMint8 reg = REG_NUM(tile->values[1]);
    | xorpd xmm0, xmm0;
    | ucomisd xmm(reg), xmm0;
}


MVM_JIT_TILE_DECL(cmp) {
    MVMint8 regl = tile->values[1];
    MVMint8 regr = tile->values[2];
    switch (tile->size) {
    case 1:
        | cmp Rb(regl), Rb(regr);
        break;
    case 2:
        | cmp Rw(regl), Rw(regr);
        break;
    case 4:
        | cmp Rd(regl), Rd(regr);
        break;
    case 8:
        | cmp Rq(regl), Rq(regr);
        break;
    }
}

MVM_JIT_TILE_DECL(cmp_num) {
    MVMint8 left = REG_NUM(tile->values[1]);
    MVMint8 right = REG_NUM(tile->values[2]);
    | ucomisd xmm(left), xmm(right);
}


MVM_JIT_TILE_DECL(flagval) {
    /* Intel SDM Volume 1, ch5.1.6 - Bit and Byte Instructions */
    MVMint8 out = tile->values[0];
    MVMint32 child = MVM_JIT_EXPR_LINKS(tree, tile->node)[0];
    enum MVMJitExprOperator flag  = tree->nodes[child];
    /* This isn't exactly elegant, better mechanism wanted */
    MVMuint8 test_type = MVM_JIT_EXPR_INFO(tree, child)->type;
    if (test_type == MVM_reg_num32 || test_type == MVM_reg_num64) {
        switch (flag) {
        case MVM_JIT_LT:
            | setnp al;
            | setb Rb(out);
            | and Rb(out), al;
            break;
        case MVM_JIT_LE:
            | setnp al;
            | setbe Rb(out);
            | and Rb(out), al;
            break;
        case MVM_JIT_ZR:
        case MVM_JIT_EQ:
            | setnp al;
            | setz Rb(out);
            | and Rb(out), al;
            break;
        case MVM_JIT_NZ:
        case MVM_JIT_NE:
            | setp al;
            | setnz Rb(out);
            | or Rb(out), al;
            break;
        case MVM_JIT_GE:
            | setae Rb(out);
            break;
        case MVM_JIT_GT:
            | seta Rb(out);
            break;
        default:
            abort();
        }
    } else {
        switch (flag) {
        case MVM_JIT_LT:
            | setl Rb(out);
            break;
        case MVM_JIT_LE:
            | setle Rb(out);
            break;
        case MVM_JIT_ZR:
        case MVM_JIT_EQ:
            | setz Rb(out);
            break;
        case MVM_JIT_NZ:
        case MVM_JIT_NE:
            | setnz Rb(out);
            break;
        case MVM_JIT_GE:
            | setge Rb(out);
            break;
        case MVM_JIT_GT:
            | setg Rb(out);
            break;
        default:
            abort();
        }
    }
    /* XXX THIS IS A HACK

     * The size cast is supposed to be applied by the expression template
     * builder, but that subtly doesn't work (it's not applied for STORE
     * operands, and when it is, it causes even subtler errors with CONST
     * arguments. (const_i64_16 returns a 64 bit signed integer as a 16 bit
     * signed integer argument, and currently CONST doesn't have a sign, and the
     * tile yielding the value (fortunately) doesn't respect the size. */

    | movzx Rq(out), Rb(out);
}


MVM_JIT_TILE_DECL(mark) {
    MVMint32 label = tile->args[0];
    |=>(label):
}

MVM_JIT_TILE_DECL(label) {
    MVMint8 reg = tile->values[0];
    MVMint32 label = tile->args[0];
    | lea Rq(reg), [=>label];
}

MVM_JIT_TILE_DECL(branch_label) {
    MVMint32 label = tile->args[0];
    if (label >= 0) {
        | jmp =>(label);
    } else {
        | jmp ->exit;
    }
}



static void move_call_value(MVMThreadContext *tc, MVMJitCompiler *compiler, MVMJitTile *tile) {
    if (MVM_JIT_TILE_YIELDS_VALUE(tile)) {
        MVMint8 out = tile->values[0];
        if (IS_GPR(out)) {
            | mov Rq(out), rax;
        } else {
            | movsd xmm(REG_NUM(out)), xmm0;
        }
    }
}

MVM_JIT_TILE_DECL(call) {
    MVMint8 reg = tile->values[1];
    | call Rq(reg);
    move_call_value(tc, compiler, tile);
}

MVM_JIT_TILE_DECL(call_func) {
    uintptr_t ptr = tree->constants[tile->args[0]].u;
    | callp ptr;
    move_call_value(tc, compiler, tile);
}


MVM_JIT_TILE_DECL(call_addr) {
    MVMint8  reg = tile->values[1];
    MVMint32 ofs = tile->args[0];
    | call qword [Rq(reg)+ofs];
    move_call_value(tc, compiler, tile);
}
